\documentclass{beamer}
\usepackage{beamerthemeshadow}
\usepackage{listings}
\usepackage[caption=false]{subfig}
\usepackage{multirow}
\usepackage{float}
\usepackage{caption}
\usepackage{tikz}
\usetikzlibrary{fit, arrows, decorations.markings, positioning}


\title[Framework for Q\&A and Application to BI]{\textsc{QUASL}: A Framework for Question Answering and its Application to Business Intelligence}
\subtitle{Paper submitted to industrial track of EDBT '13 \\in collaboration with F. Brauer, SAP AG}
\author{Nicolas Kuchmann-Beauger}
\institute{SAP France -- \'Ecole Centrale Paris}
\date{\today}

\begin{document}
\begin{frame}
\titlepage
\end{frame}

\begin{frame}
\frametitle{Outline}
\tableofcontents
\end{frame}

\section{Introduction}
\subsection{Context}

\frame{
\frametitle{Search360}
QUASL also called ``Search360'' is a project from SAP Research that brings together different techniques:
\begin{itemize}
\item {\bf Question Answering (Q\&A)}
\item Natural Language (NL): speech-to-text and {\bf NL processing}
\item Recommender Systems
\item Machine Learning
\item In-memory databases (SAP HANA)
\item \ldots
\end{itemize}
We are a team of aprox. 10 people including up to 4 PhD students.
}

\frame{
\frametitle{Example data warehouse}
\begin{figure}
\centering
\includegraphics[scale=0.5,trim=0pt 230pt 0pt 40pt]{data-schema}
\caption{Data model of a warehouse: two fact tables and dimensions}
\end{figure}
}

\subsection{Problem statement}
\begin{frame}[fragile]
\frametitle{Problem Statement}

\newsavebox{\firstlisting}
\begin{lrbox}{\firstlisting}
\begin{lstlisting}[basicstyle=\ttfamily\tiny]
SELECT
 sum(Invoice_Line."DAYS"
  * Invoice_Line."NB_GUESTS"
  * Service."PRICE") AS revenue,
 Customer."LAST_NAME" AS customer
FROM City
INNER JOIN Customer
 ON (City."CITY_ID"=Customer."CITY_ID")
INNER JOIN Sales
 ON (Sales."CUST_ID"=Customer."CUST_ID")
INNER JOIN Invoice_Line
 ON (Invoice_Line."INV_ID"=Sales."INV_ID")
INNER JOIN Service
 ON (Invoice_Line."SERVICE_ID"=Service."SERVICE_ID")
WHERE
 city = 'Palo Alto' AND
 age >= 20 AND
 age <= 30
GROUP BY
 customer
ORDER BY revenue
LIMIT 5
\end{lstlisting}
\end{lrbox}

\begin{figure}
\vspace{-1.4cm}
%\centering
\subfloat[User's question and derived semantic units]{
\begin{minipage}[c][1\width]{0.4\textwidth}
\centering
\vspace{-0.7cm}
\includegraphics[trim=250pt 250pt 300pt 0pt 0pt,scale=0.4]{running-example}
\end{minipage}
}
%\caption{A user's question and derived semantic units}
%\end{figure}
%
%\begin{figure}
\subfloat[Example SQL query generated from user's question]{
\begin{minipage}[c][1\width]{0.6\textwidth}
\centering
\usebox{\firstlisting}
\end{minipage}
}
%\caption{Example SQL generated from the user's question} 
\end{figure}
\end{frame}

\section{Constraints mapping}
\subsection{Structural constraints}


\frame[plain]{
\begin{figure}
\centering
\includegraphics[scale=0.6,trim=100pt 150pt 0pt 175pt]{parse-graph}
\end{figure}
}

\frame{
\frametitle{Structural constraints}
\begin{figure}
\includegraphics[scale=0.6,trim=80pt 0pt 70pt 200pt]{tmp}
\end{figure}
}


\subsection{Mapping to structured queries}
\frame{
\frametitle{Mapping to structured queries}
\begin{equation}
Q_1=\left[
\begin{array}{lcl}
\textnormal{data source} & = & \textnormal{Resorts}\\
\textnormal{dimensions} & = & \{\textnormal{Customer}\}\\
\textnormal{measures} & = & \{\textnormal{Revenue}\}\\
\textnormal{filters} & = &
\left\{
\begin{array}{lcl}
\textnormal{City} & = & \textnormal{`Palo
Alto'},\\
\textnormal{Age} & \geq & 20,\\
\textnormal{Age} & \leq & 30
\end{array}
\right\}
\\
\textnormal{truncation} & = &
\{(\textnormal{Revenue},\downarrow,5)\}
\end{array}
\right]
\end{equation}

\begin{equation}
Q_2=\left[
\begin{array}{lcl}
\ldots\\
\textnormal{measures} & = & \{\textnormal{Margin}\}\\
\ldots\\
\textnormal{truncation} & = &
\{\textnormal{Margin},\downarrow,5)\}
\end{array}
\right]
\end{equation}
}

\section{Metrics \& evaluation}
\subsection{Scoring}
\begin{frame}
\frametitle{Scoring}
Some scoring metrics:
\begin{itemize}
\item entity recognition confidence
$$s_1(r)=\sum^{k}_{i=0}\frac{\theta_{t} c_{i,t}}{k}$$
\item complexity
$$s_2(r)=\frac{1}{|T|}\sum_{t\in T}\theta_t r_{i,t}^\prime$$
\item selectivity
$$s_3(r)=\left\{\begin{array}{ll}\frac{1}{\left|R(g,q)\right|} &
\textnormal{if }\sigma\neq 0\\
0 & \textnormal{otherwise}
\end{array}\right.$$
\end{itemize}
\end{frame}

\subsection{Evaluation approach}
\frame{
\frametitle{ManyEyes platform}
\begin{itemize}
\item ManyEyes\texttrademark: IBM's collaborative platform for datasets and charts
\item charts are created by real users, and are associated with metadata (e.g. a title)
\end{itemize}
\underline{Idea:} use the title as a query, and compare the charts generated by our system and the chart chosen by the user
}


\subsection{Evaluation corpus}
\begin{frame}
\frametitle{Evaluation corpus}
\begin{table}
\centering
\tiny
\begin{tabular}{lllll}\hline
\multicolumn{1}{c}{\textbf{Query}} & 
\multicolumn{1}{c}{\textbf{Updated query}} & &
\multicolumn{1}{c}{\textbf{Entities}} & 
\multicolumn{1}{c}{\textbf{Comment}}\\\hline\hline
State Population Change & & & $\{\textnormal{State, Population change}\}$ &
\\\hline
Home Ownership & Owner-occupied & &
$\{\textnormal{State, Owner-occupied}$ & \multirow{2}{*}{home $\sim$ dwellings}\\
by State & dwellings by state & & $\textnormal{dwellings}\}$ & \\\hline
 USA States information & & & $\{\textnormal{State}\}$ & \\\hline
 Generation Y in  & population by year & &
 \multirow{2}{*}{$\{\textnormal{Year, Population}\}$} &
 generation $\sim$ demographic 
 \\
 2010 (Ages 10-32) & for ages 10-32 & & & info.\\\hline
 And the whitest name & \multirow{2}{*}{names white percent} & &
 \multirow{2}{*}{$\{\textnormal{Name, White percent}\}$} &
 \multirow{2}{*}{whitest $\sim$ white} \\
 in America is & & & & \\\hline
 40+ Population Projections & & & $\{\textnormal{Age,
 Population}$ & \\
 by Age & & & $\textnormal{projection}\}$ & \\\hline
 Average Time Spent & & & $\{\textnormal{State}, \textnormal{Median travel}$ &
 \\
Commuting by State & & & $\textnormal{time to work}\}$ & \\\hline
\end{tabular}
\end{table}
\end{frame}

\subsection{Evaluation results}
\frame{
\frametitle{Evaluation results}
\begin{figure}
\centering
\includegraphics[scale=0.5,trim=0pt 100pt 0pt 180pt]{chart2}
\end{figure}
}

\section{State of the art}
\frame{
\frametitle{Sate of the Art}
\begin{itemize}
\item Natural Language Interfaces
\item Keyword Search over structured data
\item in BI:
\begin{itemize}
\item \textsc{Soda}: L. Blunschi et al. ``SODA: Generating SQL for Business Users'', VLDB 2012
\item \textsc{Safe}: G. Orsi et al. ``Keyword-based, context-aware selection of Natural Language query patterns'', EDBT/ICDT 2011
\item C. Unger et al. ``Template-based Question Answering over RDF Data'', WWW 2012
\end{itemize}
\end{itemize}
}


\frame{
\frametitle{Conclusion}
\begin{itemize}
\item system implemented as a framework for domain-specific Q\&A systems
\item application to BI: we generate a \textit{dashboard} on the basis of a user's query
\item approach: constraint-matching and mapping; uses standards (RDF, \textsc{SparQL})
\item results: similar performances to \textsc{WolframAlpha}
\item system is currently evaluated by real customers to eventually become a product based on their feedback
\end{itemize}
Future work:
\begin{itemize}
\item add machine learning techniques to improve the ranking
\item semi-automate the system setup (authoring tool)
\end{itemize}
}


\end{document}
